# divide the data into two pieces with equal spam and ham:
s1 <- sample(1:501,250)
s2 <- sample(1:2551,1275)

# run the regressions:
tstat <- rep(NA,10000)
beta <- matrix(rep(NA,20000),ncol=2)
for (i in 1:10000){
print(i)
spamdata <- read.table(paste("S:/Stat 991/spam/","newcol",i,sep=""))
hamdata <- read.table(paste("S:/Stat 991/easy_ham/","newcol",i,sep=""))
y <- c(rep(1,250),rep(0,1275))
x <- c(spamdata[s1,2],hamdata[s2,2])
if(dim(table(x))==1){
beta[i,] <- c(lm(y~x)$coef[1],NA)
tstat[i] <- NA
} else{
beta[i,] <- lm(y~x)$coef
tstat[i] <- summary(lm(y~x))$coef[2,3]}}

# save the objects:
#save(beta2,file="beta2.RData")
#save(tstat2,file="tstat2.RData")

#save(s1,file="s1.RData")
#save(s2,file="s2.RData")

                           